########## Figure 1: Financial concerns #########
## This script replicates Figure 1 of "Do financial concerns make workers less productive?" by
## Supreet Kaur, Sendhil Mullainathan, Suanna Oh, Frank Schilbach
####################################################

rm(list= ls())

detachAllPackages <- function() {
  
  basic.packages <- c("package:stats","package:graphics","package:grDevices","package:utils","package:datasets","package:methods","package:base")
  
  package.list <- search()[ifelse(unlist(gregexpr("package:",search()))==1,TRUE,FALSE)]
  
  package.list <- setdiff(package.list,basic.packages)
  
  if (length(package.list)>0)  for (package in package.list) detach(package, character.only=TRUE)
  
}

detachAllPackages()

ipak <- function(pkg){
  new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
  if (length(new.pkg)) 
    install.packages(new.pkg, dependencies = TRUE)
  sapply(pkg, require, character.only = TRUE)
}

### Importing required packages
packages <- c("rio","SnowballC" ,"wordcloud2", "RColorBrewer", "tm", "hunspell", "tidytext",
              "ggplot2", "tibble","tidyverse","htmlwidgets","webshot")
ipak(packages)

############ Panel D: Word cloud
dir = "/Users/jackcavanagh/Dropbox (MIT)/Financial_Strain_Productivity/Proposed_publication_package"
path.out  = paste0(dir,"/Output/Main/Figures")

mytheme = theme_bw() +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_blank(),
        panel.border = element_blank(),
        axis.line = element_blank(),
        axis.ticks.y = element_blank(),
        text =  element_text(size = 12),
        axis.text = element_text(color = "black"))

df.all.raw = import(file.path(dir, "data/financial_strain.dta")) %>% as_tibble()

df.all = df.all.raw %>% 
  filter(round <=15)

df.issue.0 = 
  df.all.raw %>% 
  filter(round <= 15) %>% 
  distinct(pid, round, c4b_think_money_issue) %>% 
  dplyr::rename(issue = c4b_think_money_issue) %>% 
  filter(issue != "" & issue != "-222" )

n.pids = nrow(df.issue.0)

df.issue.0 = df.issue.0 %>% 
  unnest_tokens(issue, issue, token = "regex", pattern = "/|AND|,") %>%
  filter(issue != "nothing")

# Eliminating stop words
issue_no_stop = Corpus(VectorSource(df.issue.0$issue)) %>% 
  tm_map(removeWords, stopwords("english")) %>% 
  tm_map(removeWords, c("due")) %>% 
  tm_map(str_replace, pattern = "fooding", replacement = "food") %>% 
  tm_map(str_replace, pattern = "manage|worried|-222", replacement = "") %>% 
  tm_map(str_replace, pattern = "childrens education|childrens study|education son|daughters study|children education", replacement = "education children") %>%
  tm_map(str_replace, pattern = "daughters marriage|daughter marriage", replacement = "marriage daughter") %>%
  tm_map(str_replace, pattern = "son marriage", replacement = "marriage son") %>%
  tm_map(str_replace, pattern = "building", replacement = "build") %>% 
  tm_map(str_replace, pattern = "future children|future son", replacement = "childrens future") %>%
  tm_map(str_replace, pattern = "pay credit|repay loan", replacement = "pay loan") %>% 
  tm_map(str_replace, pattern = "brothers marriage", replacement = "marriage brother") %>%
  tm_map(str_replace, pattern = "educations", replacement = "education") %>%
  tm_map(str_replace, pattern = "health issues|health problems|health problem", replacement = "health issue") %>%
  tm_map(str_replace, pattern = "loan|pay loan|loans", replacement = "loan") %>%
  tm_map(str_replace, pattern = "get work", replacement = "find work") %>%
  tm_map(str_replace, pattern = "making home|house construction work|make home", replacement = "construction house") %>%
  tm_map(str_replace, pattern = "medical expenses", replacement = "health expenses") %>%
  sapply(identity) %>% 
  sapply(str_squish)

df.issue.1 = df.issue.0 %>% 
  mutate(issue_no_stop = issue_no_stop,
         issue_no_stop = issue_no_stop %>% recode("health" = "health issue")) %>% 
  group_by(issue_no_stop) %>% 
  dplyr::mutate(fr_issues = n()) %>% 
  arrange(desc(fr_issues), issue_no_stop)

df.issue.agg = df.issue.1 %>% 
  count(issue_no_stop, sort = T) %>% 
  dplyr::rename(issue_final = issue_no_stop)

df.issue.agg <- df.issue.agg[order(-df.issue.agg$n),]
df.issue.agg <- df.issue.agg[c(1:100),]

set.seed(1237)
WC <- wordcloud2(df.issue.agg,color=brewer.pal(9,"Paired"))

wcpath <- paste0(path.out,"/Figure_1_D")
saveWidget(WC,paste0(wcpath,".html"),selfcontained = F)
# and in png
webshot::install_phantomjs()
webshot(paste0(wcpath,".html"),paste0(wcpath,".png"), delay =5, vwidth = 480, vheight=480)

############# Figure 1, panels A-C ################
rm(list = ls())

dir = "/Users/jackcavanagh/Dropbox (MIT)/Financial_Strain_Productivity/Proposed_publication_package"
path.out  = paste0(dir,"/Output/Main/Figures")

mytheme = theme_bw() +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_blank(),
        panel.border = element_blank(),
        axis.line = element_blank(),
        axis.ticks.y = element_blank(),
        text =  element_text(size = 12),
        axis.text = element_text(color = "black"))

df.all.raw = import(file.path(dir, "Data/analysis_all.dta")) %>% as_tibble()

df.all.0 = df.all.raw %>%  
  distinct(pid, round, wavenum, survsample, cash,
           q1a_think_work_today1, q1a_think_work_today2,
           q1a_think_work_today3, q1a_think_work_today4,
           q1a_think_work_today_oth,
           q1c_think_about1, q1c_think_about2,
           q1c_think_about3, q1c_think_about4,
           q1c_think_about5, q1c_think_about6,
           q1c_think_about7) %>% 
  arrange(pid) %>% 
  filter(round <= 15)

labs = (df.all.raw$future_fin_worry %>% attributes())$labels
labs.df = tibble(lab.name = names(labs), future_fin_worry = labs %>% as.numeric()) %>%
  mutate(lab.name = c("Not \n worried", "Little \n worried", "Quite \n worried", "Very \n worried"))

df.g1 = df.all.raw %>% 
  filter(round <=15 ) %>% 
  distinct(pid,round,future_fin_worry) %>% 
  left_join(labs.df, by = "future_fin_worry") %>%
  na.omit() %>% 
  mutate(future_fin_worry_fct = factor(lab.name,
                                       levels = c("Very \n worried" , "Quite \n worried" , "Little \n worried", "Not \n worried")),
         n.pids = n() ) %>% 
  group_by(future_fin_worry_fct, n.pids) %>% 
  summarise(freq = n()) %>% 
  mutate(prob = freq/n.pids*100, 
         se = (prob*(100-prob)/n.pids)^0.5,
         share = prob)

n.pids = df.g1$n.pids[1] 

ggplot(df.g1, aes(future_fin_worry_fct, share)) +
  mytheme +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        text =  element_text(size  = 23)) +
  geom_bar(stat = "identity", color = "black", fill = "deepskyblue4") +
  geom_errorbar(aes(ymin = ifelse(prob - 1.96*se<0,0,prob - 1.96*se), ymax = prob + 1.96*se), width = .1, size = .7) +
  labs(x = "",
       title = "",
       y = "Share of participants (%)") +
  scale_y_continuous(limits = c(0,100), breaks = seq(0,100,20), expand = c(0,0)) -> p1; p1

ggsave(file.path(path.out, "Figure_1_A.png"),width = 9, height = 6)

##### Panel B ######
labs = (df.all.raw$c3b_often_money_issue %>% attributes())$labels
labs.df = tibble(lab.name = names(labs), c3b_often_money_issue = labs %>% as.numeric()) %>% 
  mutate(lab.name = c("Missing", "No answer", "Every day", "Every day", "A few times \n a week", "Not often"))

df.g2 = df.all.raw %>%  
  mutate(got_exitdayprime = 1*(survsample==1)) %>% 
  filter(round <=15 & survsample==1) %>% 
  distinct(pid,round,c3b_often_money_issue) %>% 
  left_join(labs.df, by = "c3b_often_money_issue") %>%
  filter(! lab.name %in% c("Missing", "No answer")) %>% 
  mutate(worry_freq = factor(lab.name, levels = c("Every day", "A few times \n a week", "Not often")),
         n.pids = n()) %>% 
  group_by(worry_freq, n.pids) %>% 
  summarise(freq = n()) %>% 
  mutate(prob = freq/n.pids*100, 
         se = (prob*(100-prob)/n.pids)^0.5,
         share = prob) %>% 
  na.omit() %>% 
  ungroup()

n.pids = df.g2$n.pids[1] 

ggplot(df.g2, aes(worry_freq, share)) +
  mytheme +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        text =  element_text(size = 23) ) +
  geom_bar(stat = "identity", color = "black", fill = "deepskyblue4") +
  geom_errorbar(aes(ymin = ifelse(prob - 1.96*se<0,0,prob - 1.96*se), ymax = prob + 1.96*se), width = .1, size = .7) +
  labs(x = "",
       title = "",
       y = "Share of participants (%)") +
  scale_y_continuous(limits = c(0,100), breaks = seq(0,100,20), expand = c(0,0) ) -> p2; p2

ggsave(file.path(path.out, "Figure_1_B.png"),width = 9, height = 6)

### Panel C #####
labs = (df.all.raw$c4a_how_long_money_issue %>% attributes())$labels
labs.df = tibble(lab.name = names(labs), c4a_how_long_money_issue = labs %>% as.numeric()) %>% 
  mutate(lab.name = c("Missing", "Whole day", "A few \n hours", "Around \n one hour", "A few \n minutes"))

df.g4 = df.all.raw %>%  
  mutate(got_exitdayprime = 1*(survsample==1)) %>% 
  filter(round <=15 & survsample==1) %>% 
  distinct(pid,round,c4a_how_long_money_issue) %>% 
  left_join(labs.df, by = "c4a_how_long_money_issue") %>%
  filter(! lab.name %in% c("Missing", "No answer")) %>% 
  mutate(worry_freq = factor(lab.name, levels = labs.df[["lab.name"]]),
         n.pids = n()) %>% 
  group_by(worry_freq, n.pids) %>% 
  summarise(freq = n()) %>% 
  mutate(prob = freq/n.pids*100, 
         se = (prob*(100-prob)/n.pids)^0.5,
         share = prob) %>% 
  na.omit()

n.pids = df.g4$n.pids[1] 

ggplot(df.g4, aes(worry_freq, share)) +
  mytheme +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        text =  element_text(size = 23) ) +
  geom_bar(stat = "identity", color = "black", fill = "deepskyblue4") +
  geom_errorbar(aes(ymin = ifelse(prob - 1.96*se<0,0,prob - 1.96*se), ymax = prob + 1.96*se), width = .1, size = .7) +
  labs(x = "",
       title = "",
       y = "Share of participants (%)") +
  scale_y_continuous(limits = c(0,100), breaks = seq(0,100,20), expand = c(0,0)) -> p4; p4

ggsave(file.path(path.out, "Figure_1_C.png"),width = 9, height = 6)


